In [ ]:
# Create a SystemML MLContext object
from systemml import MLContext, dml
ml = MLContext(sc)
The MNIST dataset contains labeled images of handwritten digits, where each example is a 28x28 pixel image of grayscale values in the range [0,255] stretched out as 784 pixels, and each label is one of 10 possible digits in [0,9]. Here, we download 60,000 training examples, and 10,000 test examples, where the format is "label, pixel_1, pixel_2, ..., pixel_n".
In [ ]:
%%sh
mkdir -p data/mnist/
cd data/mnist/
curl -O https://pjreddie.com/media/files/mnist_train.csv
curl -O https://pjreddie.com/media/files/mnist_test.csv
In [ ]:
training = """
source("nn/examples/mnist_softmax.dml") as mnist_softmax
# Read training data
data = read($data, format="csv")
n = nrow(data)
# Extract images and labels
images = data[,2:ncol(data)]
labels = data[,1]
# Scale images to [0,1], and one-hot encode the labels
images = images / 255.0
labels = table(seq(1, n), labels+1, n, 10)
# Split into training (55,000 examples) and validation (5,000 examples)
X = images[5001:nrow(images),]
X_val = images[1:5000,]
y = labels[5001:nrow(images),]
y_val = labels[1:5000,]
# Train
epochs = 1
[W, b] = mnist_softmax::train(X, y, X_val, y_val, epochs)
"""
script = dml(training).input("$data", "data/mnist/mnist_train.csv").output("W", "b")
W, b = ml.execute(script).get("W", "b")
In [ ]:
testing = """
source("nn/examples/mnist_softmax.dml") as mnist_softmax
# Read test data
data = read($data, format="csv")
n = nrow(data)
# Extract images and labels
X_test = data[,2:ncol(data)]
y_test = data[,1]
# Scale images to [0,1], and one-hot encode the labels
X_test = X_test / 255.0
y_test = table(seq(1, n), y_test+1, n, 10)
# Eval on test set
probs = mnist_softmax::predict(X_test, W, b)
[loss, accuracy] = mnist_softmax::eval(probs, y_test)
print("Test Accuracy: " + accuracy)
"""
script = dml(testing).input("$data", "data/mnist/mnist_test.csv", W=W, b=b)
ml.execute(script)
In [ ]:
W_df = W.toDF()
b_df = b.toDF()
W_df, b_df